I. Preliminaries

Loading libraries

library("tidyverse")
library("tibble")
library("msigdbr")
library("ggplot2")
library("TCGAbiolinks")
library("RNAseqQC")
library("DESeq2")
library("ensembldb")
library("purrr")
library("magrittr")
library("vsn")
library("matrixStats")
library("dplyr")
library("grex")
library("survminer")
library("survival")

II. Downloading the TCGA gene expression data

Create a function for downloading TCGA gene expression data.

For more detailed documentation, refer to 2. Differential Gene Expression Analysis - TCGA.Rmd.

GDC_DIR = "../data/public/GDCdata"

query_and_filter_samples <- function(project) {
  query_tumor <- GDCquery(
    project = project,
    data.category = "Transcriptome Profiling",
    data.type = "Gene Expression Quantification",
    experimental.strategy = "RNA-Seq",
    workflow.type = "STAR - Counts",
    access = "open",
    sample.type = "Primary Tumor"
  )
  tumor <- getResults(query_tumor)

  query_normal <- GDCquery(
    project = project,
    data.category = "Transcriptome Profiling",
    data.type = "Gene Expression Quantification",
    experimental.strategy = "RNA-Seq",
    workflow.type = "STAR - Counts",
    access = "open",
    sample.type = "Solid Tissue Normal"
  )
  normal <- getResults(query_normal)

  submitter_ids <- inner_join(tumor, normal, by = "cases.submitter_id") %>%
    dplyr::select(cases.submitter_id)
  tumor <- tumor %>%
    dplyr::filter(cases.submitter_id %in% submitter_ids$cases.submitter_id)
  normal <- normal %>%
    dplyr::filter(cases.submitter_id %in% submitter_ids$cases.submitter_id)

  samples <- rbind(tumor, normal)
  unique(samples$sample_type)

  query_project <- GDCquery(
    project = project,
    data.category = "Transcriptome Profiling",
    data.type = "Gene Expression Quantification",
    experimental.strategy = "RNA-Seq",
    workflow.type = "STAR - Counts",
    access = "open",
    sample.type = c("Solid Tissue Normal", "Primary Tumor"),
    barcode = as.list(samples$sample.submitter_id)
  )

  # If this is your first time running this notebook (i.e., you have not yet downloaded the results of the query in the previous block),
  # uncomment the code block below

  # GDCdownload(
  #   query_coad,
  #   directory = GDC_DIR
  # )

  return(list(samples = samples, query_project = query_project))
}

Download the TCGA gene expression data for colorectal cancer (TCGA-COAD).

projects <- c("TCGA-COAD")

with_results_projects <- c()

samples <- list()
project_data <- list()

for (project in projects) {
  result <- tryCatch(
    {
      result <- query_and_filter_samples(project)
      samples[[project]] <- result$samples
      project_data[[project]] <- result$query_project

      with_results_projects <- c(with_results_projects, project)
    },
    error = function(e) {

    }
  )
}

Running the code block above should generate and populate a directory named GDCdata.

III. Data preprocessing

Construct the RNA-seq count matrix for each cancer type.

tcga_data <- list()
tcga_matrix <- list()

projects <- with_results_projects
for (project in projects) {
  tcga_data[[project]] <- GDCprepare(
    project_data[[project]], 
    directory = GDC_DIR,
    summarizedExperiment = TRUE
  )
}
for (project in projects) {
  count_matrix <- assay(tcga_data[[project]], "unstranded")

  # Remove duplicate entries
  count_matrix_df <- data.frame(count_matrix)
  count_matrix_df <- count_matrix_df[!duplicated(count_matrix_df), ]
  count_matrix <- data.matrix(count_matrix_df)
  rownames(count_matrix) <- cleanid(rownames(count_matrix))
  count_matrix <- count_matrix[!(duplicated(rownames(count_matrix)) | duplicated(rownames(count_matrix), fromLast = TRUE)), ]

  tcga_matrix[[project]] <- count_matrix
}

Format the samples table so that it can be fed as input to DESeq2.

for (project in projects) {
  rownames(samples[[project]]) <- samples[[project]]$cases
  samples[[project]] <- samples[[project]] %>%
    dplyr::select(case = "cases.submitter_id", type = "sample_type")
  samples[[project]]$type <- str_replace(samples[[project]]$type, "Solid Tissue Normal", "normal")
  samples[[project]]$type <- str_replace(samples[[project]]$type, "Primary Tumor", "tumor")
}

DESeq2 requires the row names of samples should be identical to the column names of count_matrix.

for (project in projects) {
  colnames(tcga_matrix[[project]]) <- gsub(x = colnames(tcga_matrix[[project]]), pattern = "\\.", replacement = "-")
  tcga_matrix[[project]] <- tcga_matrix[[project]][, rownames(samples[[project]])]

  # Sanity check
  print(all(colnames(tcga_matrix[[project]]) == rownames(samples[[project]])))
}

IV. Differential gene expression analysis

For more detailed documentation on obtaining the gene set, refer to 7. Differential Gene Expression Analysis - TCGA - Pan-cancer - Unique Genes.Rmd.

RCDdb <- "../data/public/rcd-gene-list/unique-genes/necroptosis-ferroptosis-pyroptosis/"

Write utility functions for filtering the gene sets, performing differential gene expression analysis, plotting the results, and performing variance-stabilizing transformation.

filter_gene_set_and_perform_dgea <- function(genes) {
  tcga_rcd <- list()

  for (project in projects) {
    rownames(genes) <- genes$gene_id
    tcga_rcd[[project]] <- tcga_matrix[[project]][rownames(tcga_matrix[[project]]) %in% genes$gene_id, ]
    tcga_rcd[[project]] <- tcga_rcd[[project]][, rownames(samples[[project]])]
  }

  dds_rcd <- list()
  res_rcd <- list()

  for (project in projects) {
    print(project)
    print("=============")
    dds <- DESeqDataSetFromMatrix(
      countData = tcga_rcd[[project]],
      colData = samples[[project]],
      design = ~type
    )
    dds <- filter_genes(dds, min_count = 10)
    dds$type <- relevel(dds$type, ref = "normal")
    dds_rcd[[project]] <- DESeq(dds)
    res_rcd[[project]] <- results(dds_rcd[[project]])
  }

  deseq.bbl.data <- list()

  for (project in projects) {
    deseq.results <- res_rcd[[project]]
    deseq.bbl.data[[project]] <- data.frame(
      row.names = rownames(deseq.results),
      baseMean = deseq.results$baseMean,
      log2FoldChange = deseq.results$log2FoldChange,
      lfcSE = deseq.results$lfcSE,
      stat = deseq.results$stat,
      pvalue = deseq.results$pvalue,
      padj = deseq.results$padj,
      cancer_type = project,
      gene_symbol = genes[rownames(deseq.results), "gene"]
    )
  }

  deseq.bbl.data.combined <- bind_rows(deseq.bbl.data)
  deseq.bbl.data.combined <- dplyr::filter(deseq.bbl.data.combined, abs(log2FoldChange) >= 1.5 & padj < 0.05)

  return(deseq.bbl.data.combined)
}
plot_dgea <- function(deseq.bbl.data.combined) {
  sizes <- c("<10^-15" = 4, "10^-10" = 3, "10^-5" = 2, "0.05" = 1)

  deseq.bbl.data.combined <- deseq.bbl.data.combined %>%
    mutate(fdr_category = cut(padj,
      breaks = c(-Inf, 1e-15, 1e-10, 1e-5, 0.05),
      labels = c("<10^-15", "10^-10", "10^-5", "0.05"),
      right = FALSE
    ))

  top_genes <- deseq.bbl.data.combined %>%
    group_by(cancer_type) %>%
    mutate(rank = rank(-abs(log2FoldChange))) %>%
    dplyr::filter(rank <= 10) %>%
    ungroup()

  ggplot(top_genes, aes(y = cancer_type, x = gene_symbol, size = fdr_category, fill = log2FoldChange)) +
    geom_point(alpha = 0.5, shape = 21, color = "black") +
    scale_size_manual(values = sizes) +
    scale_fill_gradient2(low = "blue", mid = "white", high = "red", limits = c(min(deseq.bbl.data.combined$log2FoldChange), max(deseq.bbl.data.combined$log2FoldChange))) +
    theme_minimal() +
    theme(
      axis.text.x = element_text(size = 9, angle = 90, hjust = 1)
    ) +
    theme(legend.position = "bottom") +
    theme(legend.position = "bottom") +
    labs(size = "Adjusted p-value", fill = "log2 FC", y = "Cancer type", x = "Gene")
}
perform_vsd <- function(genes) {
  tcga_rcd <- list()

  for (project in projects) {
    rownames(genes) <- genes$gene_id
    tcga_rcd[[project]] <- tcga_matrix[[project]][rownames(tcga_matrix[[project]]) %in% genes$gene_id, ]
    tcga_rcd[[project]] <- tcga_rcd[[project]][, rownames(samples[[project]])]
  }

  vsd_rcd <- list()

  for (project in projects) {
    print(project)
    print("=============")
    dds <- DESeqDataSetFromMatrix(
      countData = tcga_rcd[[project]],
      colData = samples[[project]],
      design = ~type
    )
    dds <- filter_genes(dds, min_count = 10)

    # Perform variance stabilization
    dds <- estimateSizeFactors(dds)
    nsub <- sum(rowMeans(counts(dds, normalized = TRUE)) > 10)
    vsd <- vst(dds, nsub = nsub)
    vsd_rcd[[project]] <- assay(vsd)
  }

  return(vsd_rcd)
}

Pyroptosis

Fetch the gene set of interest.

genes <- read.csv(paste0(RCDdb, "Pyroptosis.csv"))
print(genes)
genes$gene_id <- cleanid(genes$gene_id)
genes <- distinct(genes, gene_id, .keep_all = TRUE)
genes <- subset(genes, gene_id != "")
genes

Filter the genes to include only those in the gene set of interest, and then perform differential gene expression analysis.

deseq.bbl.data.combined <- filter_gene_set_and_perform_dgea(genes)
[1] "TCGA-COAD"
[1] "============="
Warning: some variables in design formula are characters, converting to factorsestimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
-- replacing outliers and refitting for 3 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)
estimating dispersions
fitting model and testing
deseq.bbl.data.combined

Plot the results.

plot_dgea(deseq.bbl.data.combined)

Perform variance-stabilizing transformation for further downstream analysis (i.e., for survival analysis).

vsd <- perform_vsd(genes)
[1] "TCGA-COAD"
[1] "============="

V. Downloading the clinical data

Download clinical data from TCGA, and perform some preprocessing: - The deceased column should be FALSE if the patient is alive and TRUE otherwise - The overall_survival column should reflect the follow-up time if the patient is alive and the days to death otherwise

download_clinical_data <- function(project) {
  clinical_data <- GDCquery_clinic(project)
  clinical_data$deceased <- ifelse(clinical_data$vital_status == "Alive", FALSE, TRUE)
  clinical_data$overall_survival <- ifelse(clinical_data$vital_status == "Alive",
    clinical_data$days_to_last_follow_up,
    clinical_data$days_to_death
  )

  return(clinical_data)
}
tcga_clinical <- list()
for (project in projects) {
  tcga_clinical[[project]] <- download_clinical_data(project)
}

VI. Performing survival analysis

Write utility functions for performing survival analysis.

construct_gene_df <- function(gene_of_interest, project) {
  normal_df <- tcga_matrix[[project]] %>%
    as.data.frame() %>%
    rownames_to_column(var = "gene_id") %>%
    gather(key = "case_id", value = "counts", -gene_id) %>%
    left_join(., genes, by = "gene_id") %>%
    dplyr::filter(gene == gene_of_interest) %>%
    dplyr::filter(case_id %in% rownames(samples[[project]] %>% dplyr::filter(type == "normal")))
  normal_df$case_id <- paste0(sapply(strsplit(as.character(normal_df$case_id), "-"), `[`, 1), '-',
                            sapply(strsplit(as.character(normal_df$case_id), "-"), `[`, 2), '-', 
                            sapply(strsplit(as.character(normal_df$case_id), "-"), `[`, 3))
  
  tumor_df <- tcga_matrix[[project]] %>%
      as.data.frame() %>%
      rownames_to_column(var = "gene_id") %>%
      gather(key = "case_id", value = "counts", -gene_id) %>%
      left_join(., genes, by = "gene_id") %>%
      dplyr::filter(gene == gene_of_interest) %>%
      dplyr::filter(case_id %in% rownames(samples[[project]] %>% dplyr::filter(type == "tumor")))
  tumor_df$case_id <- paste0(sapply(strsplit(as.character(tumor_df$case_id), "-"), `[`, 1), '-',
                            sapply(strsplit(as.character(tumor_df$case_id), "-"), `[`, 2), '-', 
                            sapply(strsplit(as.character(tumor_df$case_id), "-"), `[`, 3))
  
  gene_df <- inner_join(normal_df, tumor_df, by = c("gene_id", "case_id", "deathtype", "gene", "description", "gene_biotype", "pmid", "comment"))
  gene_df$log_fold = log2(gene_df$counts.y / gene_df$counts.x)
  
  gene_df$strata <- ifelse(abs(gene_df$log_fold) >= 1.5, "HIGH", "LOW")
  gene_df <- merge(gene_df, tcga_clinical[[project]], by.x = "case_id", by.y = "submitter_id")
  
  return(gene_df)
}
compute_surival_fit <- function(gene_df) {
  return (survfit(Surv(overall_survival, deceased) ~ strata, data = gene_df))
}
compute_cox <- function(gene_df) {
  return (coxph(Surv(overall_survival, deceased) ~ strata, data=gene_df))
}
plot_survival <- function(fit) {
  return(ggsurvplot(fit,
    data = gene_df,
    pval = T,
    risk.table = T,
    risk.table.height = 0.3
  ))
}
compute_survival_diff <- function(gene_df) {
  return(survdiff(Surv(overall_survival, deceased) ~ strata, data = gene_df))
}

Perform survival analysis by testing for the difference in the Kaplan-Meier curves using the G-rho family of Harrington and Fleming tests: https://rdrr.io/cran/survival/man/survdiff.html

Our genes of interest are GSDMD (the primary executor of pyroptosis) and the differentially expressed genes..

significant_projects <- c()
significant_genes <- c()

ctr <- 1
for (project in projects) {
  for (gene in c("GSDMD", genes$gene)) {
    cat(project, gene, "\n\n")
    error <- tryCatch (
      {
        gene_df <- construct_gene_df(gene, project)
      },
      error = function(e) {
        cat("\n\n============================\n\n")
        e
      }
    )
    
    if(inherits(error, "error")) next

    if (nrow(gene_df) > 0) {
      fit <- compute_surival_fit(gene_df)
      tryCatch (
        {
          survival <- compute_survival_diff(gene_df)
          cox <- compute_cox(gene_df)
          print(ctr)
          ctr <- ctr + 1
          print(survival)
          cat("\n")
          print(cox)
          print(plot_survival(fit))
          if (pchisq(survival$chisq, length(survival$n)-1, lower.tail = FALSE) < 0.05) {
            significant_projects <- c(significant_projects, project)
            significant_genes <- c(significant_genes, gene)
          }
        },
        error = function(e) {
        }
      )
      
    }
    
    cat("\n\n============================\n\n")
  }
}
TCGA-COAD GSDMD 

[1] 1
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 3        3     2.77   0.01966    0.0277
strata=LOW  9        9     9.23   0.00589    0.0277

 Chisq= 0  on 1 degrees of freedom, p= 0.9 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

             coef exp(coef) se(coef)      z     p
strataLOW -0.1159    0.8906   0.6967 -0.166 0.868

Likelihood ratio test=0.03  on 1 df, p=0.8689
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD CHMP7 

[1] 2
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 3        3     3.08  0.001892   0.00279
strata=LOW  9        9     8.92  0.000652   0.00279

 Chisq= 0  on 1 degrees of freedom, p= 1 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

             coef exp(coef) se(coef)     z     p
strataLOW 0.03677   1.03746  0.69665 0.053 0.958

Likelihood ratio test=0  on 1 df, p=0.9578
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD GSDMC 

[1] 3
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 8        8     7.56    0.0257    0.0852
strata=LOW  4        4     4.44    0.0438    0.0852

 Chisq= 0.1  on 1 degrees of freedom, p= 0.8 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

             coef exp(coef) se(coef)      z     p
strataLOW -0.2005    0.8183   0.6881 -0.291 0.771

Likelihood ratio test=0.09  on 1 df, p=0.7676
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD ELANE 

[1] 4
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

             N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11       11    10.98  0.000036   0.00045
strata=LOW   1        1     1.02  0.000387   0.00045

 Chisq= 0  on 1 degrees of freedom, p= 1 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

              coef exp(coef) se(coef)      z     p
strataLOW -0.02281   0.97744  1.07582 -0.021 0.983

Likelihood ratio test=0  on 1 df, p=0.983
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD IRF1 

[1] 5
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 6        6     6.54    0.0441     0.107
strata=LOW  6        6     5.46    0.0528     0.107

 Chisq= 0.1  on 1 degrees of freedom, p= 0.7 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

            coef exp(coef) se(coef)     z     p
strataLOW 0.1993    1.2206   0.6110 0.326 0.744

Likelihood ratio test=0.11  on 1 df, p=0.7437
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD CYCS 

[1] 6
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 6        6     5.88   0.00260   0.00668
strata=LOW  6        6     6.12   0.00249   0.00668

 Chisq= 0  on 1 degrees of freedom, p= 0.9 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

              coef exp(coef) se(coef)      z     p
strataLOW -0.05431   0.94714  0.66451 -0.082 0.935

Likelihood ratio test=0.01  on 1 df, p=0.9348
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD GSDMA 

[1] 7
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 7        7     6.78   0.00735    0.0207
strata=LOW  5        5     5.22   0.00954    0.0207

 Chisq= 0  on 1 degrees of freedom, p= 0.9 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

             coef exp(coef) se(coef)      z     p
strataLOW -0.0928    0.9114   0.6446 -0.144 0.886

Likelihood ratio test=0.02  on 1 df, p=0.8855
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD CASP4 

[1] 8
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 4        4     6.11     0.728      1.94
strata=LOW  8        8     5.89     0.756      1.94

 Chisq= 1.9  on 1 degrees of freedom, p= 0.2 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

            coef exp(coef) se(coef)     z     p
strataLOW 1.0791    2.9421   0.8078 1.336 0.182

Likelihood ratio test=2.13  on 1 df, p=0.1449
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD BAK1 

[1] 9
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 4        4     4.35    0.0276      0.05
strata=LOW  8        8     7.65    0.0157      0.05

 Chisq= 0  on 1 degrees of freedom, p= 0.8 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

            coef exp(coef) se(coef)     z     p
strataLOW 0.1459    1.1571   0.6533 0.223 0.823

Likelihood ratio test=0.05  on 1 df, p=0.8225
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD NOD1 

[1] 10
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 3        3     1.95     0.569     0.759
strata=LOW  9        9    10.05     0.110     0.759

 Chisq= 0.8  on 1 degrees of freedom, p= 0.4 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

             coef exp(coef) se(coef)      z     p
strataLOW -0.6303    0.5324   0.7350 -0.858 0.391

Likelihood ratio test=0.69  on 1 df, p=0.4063
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD NLRP7 

[1] 11
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

             N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 11       11   11.615    0.0325      1.09
strata=LOW   1        1    0.385    0.9804      1.09

 Chisq= 1.1  on 1 degrees of freedom, p= 0.3 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

           coef exp(coef) se(coef)     z     p
strataLOW 1.142     3.134    1.156 0.988 0.323

Likelihood ratio test=0.79  on 1 df, p=0.374
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD CASP3 

[1] 12
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 3        3     1.98     0.521     0.741
strata=LOW  9        9    10.02     0.103     0.741

 Chisq= 0.7  on 1 degrees of freedom, p= 0.4 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

             coef exp(coef) se(coef)      z     p
strataLOW -0.6540    0.5200   0.7729 -0.846 0.397

Likelihood ratio test=0.68  on 1 df, p=0.4079
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD GSDMB 

[1] 13
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 6        6     5.77   0.00891    0.0191
strata=LOW  6        6     6.23   0.00826    0.0191

 Chisq= 0  on 1 degrees of freedom, p= 0.9 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

              coef exp(coef) se(coef)      z    p
strataLOW -0.08464   0.91884  0.61184 -0.138 0.89

Likelihood ratio test=0.02  on 1 df, p=0.8899
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD GZMB 

[1] 14
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 6        6     7.39     0.260     0.809
strata=LOW  6        6     4.61     0.417     0.809

 Chisq= 0.8  on 1 degrees of freedom, p= 0.4 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

            coef exp(coef) se(coef)     z     p
strataLOW 0.5866    1.7978   0.6605 0.888 0.375

Likelihood ratio test=0.81  on 1 df, p=0.3685
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD GSDME 

[1] 15
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 4        4     3.61    0.0427     0.065
strata=LOW  8        8     8.39    0.0183     0.065

 Chisq= 0.1  on 1 degrees of freedom, p= 0.8 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

             coef exp(coef) se(coef)      z     p
strataLOW -0.1617    0.8507   0.6347 -0.255 0.799

Likelihood ratio test=0.06  on 1 df, p=0.8004
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD CHMP3 

[1] 16
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 3        3     1.98     0.521     0.741
strata=LOW  9        9    10.02     0.103     0.741

 Chisq= 0.7  on 1 degrees of freedom, p= 0.4 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

             coef exp(coef) se(coef)      z     p
strataLOW -0.6540    0.5200   0.7729 -0.846 0.397

Likelihood ratio test=0.68  on 1 df, p=0.4079
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD DPP9 

[1] 17
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

             N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH  1        1    0.653    0.1841     0.207
strata=LOW  11       11   11.347    0.0106     0.207

 Chisq= 0.2  on 1 degrees of freedom, p= 0.6 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

             coef exp(coef) se(coef)      z     p
strataLOW -0.4955    0.6093   1.0988 -0.451 0.652

Likelihood ratio test=0.18  on 1 df, p=0.6697
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD NOD2 

[1] 18
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 5        5     5.88     0.133     0.297
strata=LOW  7        7     6.12     0.128     0.297

 Chisq= 0.3  on 1 degrees of freedom, p= 0.6 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

            coef exp(coef) se(coef)     z     p
strataLOW 0.3458    1.4132   0.6372 0.543 0.587

Likelihood ratio test=0.3  on 1 df, p=0.5824
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD NLRC4 

[1] 19
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 5        5     5.26    0.0129    0.0271
strata=LOW  7        7     6.74    0.0101    0.0271

 Chisq= 0  on 1 degrees of freedom, p= 0.9 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

            coef exp(coef) se(coef)     z     p
strataLOW 0.1049    1.1106   0.6381 0.164 0.869

Likelihood ratio test=0.03  on 1 df, p=0.8688
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD GSDMD 

[1] 20
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 3        3     2.77   0.01966    0.0277
strata=LOW  9        9     9.23   0.00589    0.0277

 Chisq= 0  on 1 degrees of freedom, p= 0.9 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

             coef exp(coef) se(coef)      z     p
strataLOW -0.1159    0.8906   0.6967 -0.166 0.868

Likelihood ratio test=0.03  on 1 df, p=0.8689
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD TIRAP 

[1] 21
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 4        4     3.59    0.0476    0.0767
strata=LOW  8        8     8.41    0.0203    0.0767

 Chisq= 0.1  on 1 degrees of freedom, p= 0.8 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

             coef exp(coef) se(coef)      z     p
strataLOW -0.1816    0.8339   0.6566 -0.277 0.782

Likelihood ratio test=0.08  on 1 df, p=0.7834
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD SCAF11 

[1] 22
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 3        3     3.08  0.001892   0.00279
strata=LOW  9        9     8.92  0.000652   0.00279

 Chisq= 0  on 1 degrees of freedom, p= 1 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

             coef exp(coef) se(coef)     z     p
strataLOW 0.03677   1.03746  0.69665 0.053 0.958

Likelihood ratio test=0  on 1 df, p=0.9578
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD NLRP6 

[1] 23
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 9        9     8.11    0.0984     0.353
strata=LOW  3        3     3.89    0.2048     0.353

 Chisq= 0.4  on 1 degrees of freedom, p= 0.6 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

             coef exp(coef) se(coef)     z     p
strataLOW -0.4170    0.6590   0.7063 -0.59 0.555

Likelihood ratio test=0.36  on 1 df, p=0.5458
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD AIM2 

[1] 24
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 7        7      3.9      2.47      4.69
strata=LOW  5        5      8.1      1.19      4.69

 Chisq= 4.7  on 1 degrees of freedom, p= 0.03 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

             coef exp(coef) se(coef)      z      p
strataLOW -1.6468    0.1927   0.8295 -1.985 0.0471

Likelihood ratio test=4.82  on 1 df, p=0.02808
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD CASP6 

[1] 25
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 3        3     1.98     0.521     0.741
strata=LOW  9        9    10.02     0.103     0.741

 Chisq= 0.7  on 1 degrees of freedom, p= 0.4 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

             coef exp(coef) se(coef)      z     p
strataLOW -0.6540    0.5200   0.7729 -0.846 0.397

Likelihood ratio test=0.68  on 1 df, p=0.4079
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD NLRP2 



============================

TCGA-COAD IRF2 

[1] 26
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 3        3     1.98     0.521     0.741
strata=LOW  9        9    10.02     0.103     0.741

 Chisq= 0.7  on 1 degrees of freedom, p= 0.4 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

             coef exp(coef) se(coef)      z     p
strataLOW -0.6540    0.5200   0.7729 -0.846 0.397

Likelihood ratio test=0.68  on 1 df, p=0.4079
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD PJVK 

[1] 27
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 3        3     3.27   0.02179    0.0321
strata=LOW  9        9     8.73   0.00815    0.0321

 Chisq= 0  on 1 degrees of freedom, p= 0.9 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

            coef exp(coef) se(coef)     z     p
strataLOW 0.1234    1.1313   0.6889 0.179 0.858

Likelihood ratio test=0.03  on 1 df, p=0.8566
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD CASP5 

[1] 28
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 9        9     9.43    0.0194    0.0986
strata=LOW  3        3     2.57    0.0713    0.0986

 Chisq= 0.1  on 1 degrees of freedom, p= 0.8 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

            coef exp(coef) se(coef)     z     p
strataLOW 0.2187    1.2445   0.6979 0.313 0.754

Likelihood ratio test=0.1  on 1 df, p=0.7577
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD NLRP1 

[1] 29
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 8        8     7.41    0.0473     0.148
strata=LOW  4        4     4.59    0.0763     0.148

 Chisq= 0.1  on 1 degrees of freedom, p= 0.7 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

             coef exp(coef) se(coef)      z     p
strataLOW -0.2624    0.7692   0.6840 -0.384 0.701

Likelihood ratio test=0.15  on 1 df, p=0.696
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD CASP9 

[1] 30
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 3        3     1.98     0.521     0.741
strata=LOW  9        9    10.02     0.103     0.741

 Chisq= 0.7  on 1 degrees of freedom, p= 0.4 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

             coef exp(coef) se(coef)      z     p
strataLOW -0.6540    0.5200   0.7729 -0.846 0.397

Likelihood ratio test=0.68  on 1 df, p=0.4079
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD PLCG1 

[1] 31
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 3        3      1.8     0.792      1.04
strata=LOW  9        9     10.2     0.140      1.04

 Chisq= 1  on 1 degrees of freedom, p= 0.3 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

             coef exp(coef) se(coef)      z     p
strataLOW -0.7354    0.4793   0.7377 -0.997 0.319

Likelihood ratio test=0.92  on 1 df, p=0.3372
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD IL18 

[1] 32
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 6        6     5.54    0.0387    0.0793
strata=LOW  6        6     6.46    0.0332    0.0793

 Chisq= 0.1  on 1 degrees of freedom, p= 0.8 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

             coef exp(coef) se(coef)      z     p
strataLOW -0.1718    0.8422   0.6107 -0.281 0.778

Likelihood ratio test=0.08  on 1 df, p=0.7781
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

TCGA-COAD DPP8 

[1] 33
Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 4        4     5.18     0.269     0.525
strata=LOW  8        8     6.82     0.204     0.525

 Chisq= 0.5  on 1 degrees of freedom, p= 0.5 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

            coef exp(coef) se(coef)     z     p
strataLOW 0.4584    1.5815   0.6373 0.719 0.472

Likelihood ratio test=0.53  on 1 df, p=0.4649
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================

Display the results only for genes where a significant difference in survival has been reported.

significant_genes
[1] "AIM2"
num_significant_genes <- length(significant_genes)

if (num_significant_genes > 0) {
  for (i in 1 : num_significant_genes) {
    project <- significant_projects[[i]]
    gene <- significant_genes[[i]]
    
    cat(project, gene, "\n\n")
    gene_df <- construct_gene_df(gene, project)
    
    survival <- compute_survival_diff(gene_df)
    cox <- compute_cox(gene_df)
    print(survival)
    cat("\n")
    print(cox)
    print(plot_survival(fit))
    
    cat("\n\n============================\n\n")
  } 
}
TCGA-COAD AIM2 

Call:
survdiff(formula = Surv(overall_survival, deceased) ~ strata, 
    data = gene_df)

n=12, 34 observations deleted due to missingness.

            N Observed Expected (O-E)^2/E (O-E)^2/V
strata=HIGH 7        7      3.9      2.47      4.69
strata=LOW  5        5      8.1      1.19      4.69

 Chisq= 4.7  on 1 degrees of freedom, p= 0.03 

Call:
coxph(formula = Surv(overall_survival, deceased) ~ strata, data = gene_df)

             coef exp(coef) se(coef)      z      p
strataLOW -1.6468    0.1927   0.8295 -1.985 0.0471

Likelihood ratio test=4.82  on 1 df, p=0.02808
n= 12, number of events= 12 
   (34 observations deleted due to missingness)


============================


  1. De La Salle University, Manila, Philippines, ↩︎

  2. De La Salle University, Manila, Philippines, ↩︎

